1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 package org.smartcrawler.common;
26
27 import org.apache.commons.configuration.Configuration;
28 import org.apache.commons.configuration.ConfigurationException;
29 import org.apache.commons.configuration.XMLConfiguration;
30
31 import java.io.File;
32
33 import java.util.Collection;
34 import java.util.Hashtable;
35 import java.util.TreeMap;
36 import org.smartcrawler.persistence.Persister;
37 import org.smartcrawler.retriever.Retriever;
38
39 /***
40 *
41 *
42 * @author <a href="mailto:pozzad@alice.it">Davide Pozza</a>
43 * @version <tt>$Revision: 1.12 $</tt>
44 */
45 public class ConfigReader {
46 /*** Creates a new instance of ConfigReader */
47 public ConfigReader() {
48 }
49
50 /***
51 *
52 * @param uri
53 * @throws org.apache.commons.configuration.ConfigurationException
54 * @return
55 */
56 public Context readConfig(String uri)
57 throws ConfigurationException {
58 if (uri == null) {
59 throw new IllegalArgumentException("URI passed is null");
60 }
61
62 return loadConfig(uri);
63 }
64
65 /***
66 *
67 * @param configFile
68 * @throws org.apache.commons.configuration.ConfigurationException
69 * @return
70 */
71 protected Context loadConfig(String configFile)
72 throws ConfigurationException {
73 Context c = new Context();
74 Configuration conf = new XMLConfiguration(configFile);
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91 c.setLoggers(buildLoggers(conf));
92
93
94
95 c.setPrecFiltersList(buildPrecFiltersList(conf));
96
97 c.setPostFiltersList(buildPostFiltersList(conf));
98
99 c.setEngineThreadNumber(buildEngineThreadNumber(conf));
100
101 c.setPersister(buildPersister(conf));
102 c.setRetriever(buildRetriever(conf));
103 return c;
104 }
105
106 /***
107 *
108 * @param conf
109 * @return
110 */
111 protected Collection buildPrecFiltersList(Configuration conf) {
112 TreeMap map = new TreeMap();
113
114 try {
115 int n = conf.getList("retriever.filters.filter.class").size();
116 for (int i = 0; i < n; i++) {
117 String key = conf.getString("retriever.filters.filter(" + i
118 + ").priority");
119 String className = conf.getString("retriever.filters.filter("
120 + i + ").class");
121 String filterName = conf.getString("retriever.filters.filter("
122 + i + ").name");
123
124 Object value = Class.forName(className).newInstance();
125
126 int np = conf.getList(
127 "retriever.filters.filter("
128 + i + ").filter-param.param-name")
129 .size();
130
131 if (np > 0) {
132 Hashtable params = new Hashtable();
133
134 for (int ip = 0; ip < np; ip++) {
135 String parName = conf.getString(
136 "retriever.filters.filter("
137 + i + ").filter-param(" + ip
138 + ").param-name");
139 String parValue = conf.getString(
140 "retriever.filters.filter("
141 + i + ").filter-param(" + ip
142 + ").param-value");
143
144 params.put(parName, parValue);
145 }
146
147 if (value instanceof AbstractParametrizableComponent) {
148 ((AbstractParametrizableComponent) value).setParameters(params);
149 }
150 }
151
152 map.put(key, value);
153 }
154
155 return map.values();
156 } catch (Exception e) {
157 e.printStackTrace();
158 return null;
159 }
160 }
161
162 /***
163 *
164 * @param conf
165 * @return
166 */
167 protected Collection buildPostFiltersList(Configuration conf) {
168 TreeMap map = new TreeMap();
169
170 try {
171 int n = conf.getList("persister.filters.filter.class").size();
172
173 for (int i = 0; i < n; i++) {
174 String key = conf.getString("persister.filters.filter(" + i
175 + ").priority");
176 String className = conf.getString("persister.filters.filter("
177 + i + ").class");
178 Object value = Class.forName(className).newInstance();
179
180 int np = conf.getList(
181 "persister.filters.filter("
182 + i + ").filter-param.param-name")
183 .size();
184
185 if (np > 0) {
186 Hashtable params = new Hashtable();
187
188 for (int ip = 0; ip < np; ip++) {
189 params.put(conf.getString(
190 "persister.filters.filter("
191 + i + ").filter-param(" + ip
192 + ").param-name"),
193 conf.getString(
194 "persister.filters.filter("
195 + i + ").filter-param(" + ip
196 + ").param-value"));
197 }
198
199 if (value instanceof AbstractParametrizableComponent) {
200 ((AbstractParametrizableComponent) value).setParameters(params);
201 }
202 }
203
204 map.put(key, value);
205 }
206
207 return map.values();
208 } catch (Exception e) {
209 e.printStackTrace();
210 return null;
211 }
212 }
213
214 /***
215 *
216 * @param conf
217 * @return
218 */
219 protected int buildEngineThreadNumber(Configuration conf) {
220 return conf.getInteger("engine.threadsNumber", new Integer("5"))
221 .intValue();
222 }
223
224 /***
225 *
226 * @param conf
227 * @return
228 */
229 protected boolean buildMultiThread(Configuration conf) {
230 return conf.getBoolean("engine.isMultiThread", true);
231 }
232
233 /***
234 *
235 * @param conf
236 * @return
237 */
238 protected Hashtable buildLoggers(Configuration conf) {
239 Hashtable loggers = new Hashtable();
240
241 loggers.put("TRACER", "no");
242 loggers.put("ACCESS", "no");
243 loggers.put("LINK", "no");
244 loggers.put("PERMISSIONS", "no");
245 loggers.put("EXTRACTOR", "no");
246 loggers.put("CONSOLE", "no");
247 loggers.put("PERSISTER", "no");
248 loggers.put("PROVIDER", "no");
249
250 try {
251 int n = conf.getList("loggers.logger[@active]").size();
252
253 for (int i = 0; i < n; i++) {
254 String active = conf.getString("loggers.logger(" + i
255 + ")[@active]");
256 String str = conf.getString("loggers.logger(" + i + ")[@type]");
257
258 loggers.put(str, active);
259 }
260 } catch (Exception e) {
261 }
262
263 return loggers;
264 }
265
266 /***
267 *
268 * @param conf
269 * @return
270 */
271
272 protected Persister buildPersister(Configuration conf) {
273 try {
274
275 String className = conf.getString("persister.class");
276 Persister persister = (Persister)Class.forName(className).newInstance();
277
278 int np = conf.getList(
279 "persister.persister-params.persister-param.param-name").size();
280 if (np > 0) {
281 Hashtable params = new Hashtable();
282
283 for (int ip = 0; ip < np; ip++) {
284 params.put(
285 conf.getString(
286 "persister.persister-params.persister-param("+ip+").param-name"),
287 conf.getString(
288 "persister.persister-params.persister-param("+ip+").param-value"));
289 }
290
291 if (persister instanceof AbstractParametrizableComponent) {
292 ((AbstractParametrizableComponent) persister).setParameters(params);
293 }
294 }
295
296 return persister;
297 } catch (Exception e) {
298 e.printStackTrace();
299 return null;
300 }
301 }
302
303 protected Retriever buildRetriever(Configuration conf) {
304 try {
305
306 String className = conf.getString("retriever.class");
307 Retriever retriever = (Retriever)Class.forName(className).newInstance();
308
309 int np = conf.getList(
310 "retriever.retriever-params.retriever-param.param-name").size();
311
312 if (np > 0) {
313 Hashtable params = new Hashtable();
314
315 for (int ip = 0; ip < np; ip++) {
316 params.put(
317 conf.getString(
318 "retriever.retriever-params.retriever-param("+ip+").param-name"),
319 conf.getString(
320 "retriever.retriever-params.retriever-param("+ip+").param-value"));
321 }
322
323 if (retriever instanceof AbstractParametrizableComponent) {
324 ((AbstractParametrizableComponent) retriever).setParameters(params);
325 }
326 }
327
328 return retriever;
329 } catch (Exception e) {
330 e.printStackTrace();
331 return null;
332 }
333 }
334
335 }